Started: 2020-09-10
Last edited: 2020-11-25 13:41:17
library(tidyverse)
# single cell
library(Seurat)
# plotting
library(patchwork)
library(ggthemes)
Alice had performed bulk-RNA seq on the placenta sample from COVID-19 positive mothers and control placentas and has a list of genes that were differentially expressed. Here we will use the single-cell RNA seq data to figure out in which cell types those DE genes are expressed.
I have already annotated the clusters. The annotated data (a seurat object) was saved as .rds, which we have to read.
seur <- readRDS("../data/seurat-object_annotated.rds")
# set active assay
DefaultAssay(seur) <- "SCT"
# set levels for idents (use merged_annotations)
clust.order <- c("dec.DSC", "dec.Endo", "dec.SMC", "dec.FB", "vil.FB", "vil.EVT", "vil.SCT", "vil.VCT", "vil.Ery", "vil.Hofb", "APC", "Bcell", "Gran", "Mono_1", "Mono_2", "NK_1", "NK_2", "NK_3", "Tcell_1", "Tcell_2", "Tcell_3")
seur@meta.data$annotation_merged <- factor(seur@meta.data$annotation_merged,
levels = clust.order)
# set idents
Idents(seur) <- seur@meta.data$annotation_merged
Below are the DE genes sent by Alice.
genes <- list()
genes$focused <- c("HSPA1A", "PPP1R11", "LY6GLY6C", "ITGAX", "IFITM1", "C1QC", "CCL2", "OAS3", "MX1")
genes$analysis1 <- c("HSPA1A", "FMC1-LUC7L2", "HSPA1B", "AC011511.4", "PPP1R11", "AL139300.1", "LY6GLY6C")
genes$analysis2 <- c("ITGAX", "OAS3", "IFITM1", "MX1", "C1QC", "MX2", "CCL2")
genes$additional <- c("C1QTNF2", "LYVE1", "TREM1", "FOLR2", "C1QB", "CCL2", "TNFRSF10C", "CXCL9", "IL1R2", "IL36A", "CD28", "OAS1", "IL1RN", "CD36", "CXCR2", "SERPING1", "CXCR1", "TNFRSF10C", "C1QA", "HCST", "IL36A", "IL4R", "LY96", "IL1R2", "CXCR2", "CXCL2", "S100A7", "IFITM3", "SELENOM", "SELENOP", "C3AR1", "CCL2", "CCL8")
genes$entry <- c("ACE2", "TMPRSS2", "BSG", "DPP4", "CTSL", "CTSB", "FURIN")
genes$new <- c("FCGRT", "GPX1", "GPX3", "GPX4", "DIO3", "TXNRD1", "TXNRD2", "TXNRD3")
all.genes <- c(genes$focused, genes$analysis1, genes$analysis2, genes$additional, genes$entry, genes$new) %>%
unique() %>%
sort()
# dotplot function
DotPlot2 <- function(object = seur, assay = "SCT", features, title = "", ...) {
p <- DotPlot(object,
assay = assay,
features = features,
dot.scale = 4,
dot.min = 0.01,
...) +
coord_flip() +
labs(caption = paste0("sctransform normalized expression"), title = title) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(size = 0.1),
legend.key.size = unit(0.75, "line"))
return(p)
}
DotPlot2(features = all.genes)
The following requested variables were not found: AC011511.4, AL139300.1, FMC1-LUC7L2, GPX1, LY6GLY6C
cowplot::ggsave2(last_plot(),
filename = "../results/03_candidate-genes/plots/dotplot_all-genes.pdf",
width = 6.5, height = 8.5, units = "in")
# violin function
VlnPlot2 <- function(object = seur, assay = "SCT", feature, ...) {
p <- VlnPlot(object = object,
features = feature,
assay = assay,
same.y.lims = FALSE,
split.by = "covid",
split.plot = TRUE,
pt.size = 0,
...) +
coord_cartesian(clip = "off") +
theme_classic() +
theme(
plot.title = element_text(size = 7, colour = "black"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(size = 0.2),
axis.line = element_line(size = 0.25),
axis.ticks = element_line(size = 0.25),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5,
size = 6, color = "black"),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 6, color = "black"),
axis.text.y = element_text(size = 6, color = "black"),
legend.key.size = unit(0.50, "lines"),
legend.position = c(1, 1),
legend.direction = "horizontal",
legend.justification = c(1, 0),
legend.text = element_text(size = 6, colour = "black")
)
p$layers[[1]]$aes_params$size = 0.25 # violin stroke
return(p)
}
# umap function
FeaturePlot2 <- function(object = seur, feature, ...) {
DefaultAssay(object) <- "SCT"
FeaturePlot(object,
feature = feature,
split.by = "covid",
pt.size = 0.1,
order = TRUE,
min.cutoff = "q10",
combine = TRUE,
...) &
plot_annotation(title = feature) &
theme_bw() +
theme(panel.grid.major = element_line(size = 0.25),
panel.grid.minor = element_blank(),
legend.key.size = unit(0.50, "line"),
legend.text = element_text(size = 6, angle = 90, hjust = 1),
legend.position = "bottom",
axis.title = element_text(size = 6),
plot.title = element_text(size = 7),
axis.title.y.right = element_blank(),
axis.text = element_blank(),
aspect.ratio = 1)
}
for(i in all.genes[all.genes %in% rownames(seur)]) {
vplot <- VlnPlot2(feature = i)
fplot <- FeaturePlot2(feature = i, max.cutoff = "q99")
cowplot::ggsave2(vplot,
filename = paste0("../results/03_candidate-genes/plots/", i, "_violinplot.pdf"),
width = 3.5, height = 2, units = "in")
cowplot::ggsave2(fplot,
filename = paste0("../results/03_candidate-genes/plots/", i, "_on-umap.png"),
width = 3.5, height = 2.5, units = "in")
vf <- vplot + fplot +
plot_layout(ncol = 1, widths = c(1, 0.5))
print(vf)
}
The default behaviour of split.by has changed.
Separate violin plots are now plotted side-by-side.
To restore the old behaviour of a single split violin,
set split.plot = TRUE.
This message will be shown once per session.
For some genes, the umap plots and violin plots seem to disagree with each other. That is, the gene is expressed at higher level in “covid” than “control” according to the violin plot, but the colours appear stronger for “control” on umap plots. I think this is simply because of the differing number of cells from “control” and “covid”. We have fewer cells from “covid” samples (as is evident also on the umap plots), so the lightness or sparseness of blue on umap plots just reflect that. Violin plots on the other hand scale the width of violins to the total number of cells in each group, so this difference is not seen in violin plots.
## data -----------------------------------------------------------------------
celltypes <- unique(seur$annotation_merged) %>% sort()
de.genes <- list()
for(i in celltypes){
de.genes[[i]] <- read.csv(
paste0("../results/04_de-genes-by-celltype/logfc_0.40/de/files/de-genes_", i, ".csv")
)
de.genes[[i]] <- dplyr::rename(de.genes[[i]], "gene" = "X")
de.genes[[i]]$celltype <- i
}
## create celltype_covid ident to use for seurat dotplot ----------------------
seur$celltype_covid <- paste0(seur$annotation_merged, "_", seur$covid)
## theme for DE genes faceted dotplot -----------------------------------------
theme_dotplot <- theme(
plot.margin = margin(5.5, 5.5, 1, 5.5),
panel.background = element_blank(),
text = element_text(color = "Black"),
panel.grid = element_blank(),
panel.border = element_rect(fill = NA, size = 0.25, color = "grey"),
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 5, color = "Black"),
axis.text.y = element_text(size = 5, color = "Black"),
axis.title = element_blank(),
axis.ticks = element_line(size = 0.25, color = "grey"),
strip.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5,
size = 5.25, color = "Black"),
strip.background = element_blank(),
legend.title = element_text(size = 5.25),
legend.position = "bottom",
legend.direction = "horizontal",
legend.box = "horizontal",
legend.background = element_blank(),
legend.box.spacing = unit(0.5, "lines"),
legend.title.align = 0,
legend.text = element_text(size = 5),
legend.key.size = unit(0.5, "lines"),
legend.key = element_blank(),
panel.spacing.x = unit(0, "lines")
)
## dotplot function -----------------------------------------------------------
plot_splitdot <- function(object, features, exclude.celltypes = c()) {
# idents combined for celltype and covid
Idents(object) <- object$celltype_covid
# run seurat's dotplot
p <- Seurat::DotPlot(
object = object,
assay = "SCT",
features = features
)
# extract plotting data
p.dat <- p$data
p.dat$celltype <- gsub(p.dat$id, pattern = "(.+)_([a-z]{5})", replacement = "\\1")
p.dat$covid <- gsub(p.dat$id, pattern = "(.+)_([a-z]{5})", replacement = "\\2")
# subset for celltypes of interest
p.sub <- p.dat[!(p.dat$celltype %in% exclude.celltypes), ]
# redo scaling for selected celltype_covid idents
p.wide <- pivot_wider(p.sub,
id_cols = features.plot,
names_from = id,
values_from = avg.exp) %>%
as.data.frame()
rownames(p.wide) <- p.wide$features.plot
p.wide$features.plot <- NULL
p.scaled <- t(p.wide) %>%
scale(center = TRUE, scale = TRUE) %>%
t() %>%
as.data.frame()
p.scaled$features.plot <- rownames(p.scaled)
# melt rescaled avg.exp
p.scaled.long <- pivot_longer(
data = p.scaled,
cols = names(p.scaled)[names(p.scaled) != "features.plot"],
names_to = "id",
values_to = "scaled.new")
# cap min and max scaled value; same as col.min and col.max in Seurat::DotPlot
p.scaled.long$scaled.new[p.scaled.long$scaled.new > 2.5] <- 2.5
p.scaled.long$scaled.new[p.scaled.long$scaled.new < -2.5] <- -2.5
# join with p.dat to get pct.exp and scaled.new in the same df
plot.dat <- dplyr::inner_join(x = p.dat,
y = p.scaled.long,
by = c("features.plot", "id"))
# clustering for ordering
cdat <- pivot_wider(data = plot.dat, id_cols = "features.plot",
names_from = id,
values_from = scaled.new) %>%
as.data.frame()
rownames(cdat) <- cdat$features.plot
cdat$features.plot <- NULL
cor.matrix <- cor(t(cdat))
# reorder correlation matrix based on clustering
dd <- as.dist((1 - cor.matrix))
hc <- hclust(dd, method = 'complete')
cdat <- cdat[hc$order, ]
# reorder factor levels based on clustering
plot.dat$features.plot <- factor(plot.dat$features.plot,
levels = rownames(cdat))
# prepare annotation data (hpline for DE genes)
de.ann <- de.genes
de.ann <- do.call(rbind, de.ann)
de.ann <- subset(de.ann,
gene %in% plot.dat$features.plot &
celltype %in% plot.dat$celltype
)
de.ann$diff.exp <- de.ann$p_val_adj < 0.05
de.ann$diff.exp <- tolower(as.character(de.ann$diff.exp))
# plot
q <- ggplot(data = plot.dat,
aes(x = covid, y = features.plot)) +
geom_point(aes(fill = scaled.new, size = pct.exp),
shape = 21, stroke = 0,
color = "White") +
scale_fill_gradient(low = "White",
high = "#d94801",
name = "avg. exp. (scaled)",
guide = guide_colorbar(
title.position = "top",
title.hjust = 0.5,
ticks.colour = "black"
)
) +
scale_x_discrete(breaks = c("cntrl", "covid"),
labels = c("ctrl", "COVID")) +
scale_size_area(max_size = 2.5,
name = "% cells with exp.",
guide = guide_legend(
override.aes = list(
color = "White",
fill = "Grey30"
),
title.position = "top", title.hjust = 0.5
)
) +
facet_grid(. ~ celltype) +
ungeviz::geom_hpline(data = de.ann,
aes(y = gene, x = 1.5, color = diff.exp),
size = 0.30, width = 1, lineend = "butt") +
scale_color_manual(breaks = "true",
labels = "< 0.05",
values = "Black",
name = "DE adj.p",
guide = guide_legend(title.position = "top",
title.hjust = 0.5)) +
theme_dotplot
return(q)
}
# plot new gene list
splitdot1 <- plot_splitdot(
object = seur,
features = genes$new
)
The following requested variables were not found: GPX1
cowplot::ggsave2(
splitdot1,
filename = "../results/03_candidate-genes/plots/splitdot_set1.pdf",
width = 3.75, height = 2, units = "in"
)
splitdot1
avg <- AverageExpression(seur, assays = "SCT") %>% .$SCT
Finished averaging SCT for cluster dec.DSC
Finished averaging SCT for cluster dec.Endo
Finished averaging SCT for cluster dec.SMC
Finished averaging SCT for cluster dec.FB
Finished averaging SCT for cluster vil.FB
Finished averaging SCT for cluster vil.EVT
Finished averaging SCT for cluster vil.SCT
Finished averaging SCT for cluster vil.VCT
Finished averaging SCT for cluster vil.Ery
Finished averaging SCT for cluster vil.Hofb
Finished averaging SCT for cluster APC
Finished averaging SCT for cluster Bcell
Finished averaging SCT for cluster Gran
Finished averaging SCT for cluster Mono_1
Finished averaging SCT for cluster Mono_2
Finished averaging SCT for cluster NK_1
Finished averaging SCT for cluster NK_2
Finished averaging SCT for cluster NK_3
Finished averaging SCT for cluster Tcell_1
Finished averaging SCT for cluster Tcell_2
Finished averaging SCT for cluster Tcell_3
avg$gene_name <- rownames(avg)
avg <- relocate(avg, gene_name)
write.csv(avg, "../results/03_candidate-genes/mean-expression_assay-sct_slot-data.csv", row.names = FALSE)
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Mojave 10.14.6
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggthemes_4.2.0 patchwork_1.0.1 Seurat_3.2.2 forcats_0.5.0 stringr_1.4.0 dplyr_1.0.2 purrr_0.3.4
[8] readr_1.3.1 tidyr_1.1.2 tibble_3.0.4 ggplot2_3.3.2 tidyverse_1.3.0
loaded via a namespace (and not attached):
[1] Rtsne_0.15 colorspace_2.0-0 deldir_0.1-28 ellipsis_0.3.1
[5] ggridges_0.5.2 fs_1.5.0 rstudioapi_0.12 spatstat.data_1.4-3
[9] farver_2.0.3 leiden_0.3.3 listenv_0.8.0 ggrepel_0.8.2
[13] fansi_0.4.1 lubridate_1.7.9 xml2_1.3.2 codetools_0.2-16
[17] splines_4.0.2 knitr_1.30 polyclip_1.10-0 jsonlite_1.7.1
[21] broom_0.7.0 ica_1.0-2 cluster_2.1.0 dbplyr_1.4.4
[25] png_0.1-7 uwot_0.1.8 shiny_1.5.0 sctransform_0.3.1.9002
[29] compiler_4.0.2 httr_1.4.2 backports_1.2.0 assertthat_0.2.1
[33] Matrix_1.2-18 fastmap_1.0.1 lazyeval_0.2.2 cli_2.1.0
[37] later_1.1.0.1 htmltools_0.5.0 tools_4.0.2 rsvd_1.0.3
[41] igraph_1.2.5 gtable_0.3.0 glue_1.4.2 RANN_2.6.1
[45] reshape2_1.4.4 Rcpp_1.0.5 spatstat_1.64-1 cellranger_1.1.0
[49] vctrs_0.3.4 nlme_3.1-149 lmtest_0.9-38 xfun_0.19
[53] globals_0.13.1 rvest_0.3.6 strapgod_0.0.4.9000 mime_0.9
[57] miniUI_0.1.1.1 lifecycle_0.2.0 irlba_2.3.3 goftest_1.2-2
[61] future_1.20.1 MASS_7.3-52 zoo_1.8-8 scales_1.1.1
[65] hms_0.5.3 promises_1.1.1 spatstat.utils_1.17-0 parallel_4.0.2
[69] RColorBrewer_1.1-2 yaml_2.2.1 reticulate_1.16 pbapply_1.4-3
[73] gridExtra_2.3 rpart_4.1-15 stringi_1.5.3 rlang_0.4.8
[77] pkgconfig_2.0.3 matrixStats_0.57.0 evaluate_0.14 lattice_0.20-41
[81] ROCR_1.0-11 tensor_1.5 labeling_0.4.2 htmlwidgets_1.5.1
[85] cowplot_1.1.0 tidyselect_1.1.0 parallelly_1.21.0 RcppAnnoy_0.0.16
[89] plyr_1.8.6 magrittr_1.5 R6_2.5.0 generics_0.0.2
[93] DBI_1.1.0 pillar_1.4.6 haven_2.3.1 withr_2.3.0
[97] mgcv_1.8-31 fitdistrplus_1.1-1 survival_3.2-3 abind_1.4-5
[101] future.apply_1.6.0 modelr_0.1.8 crayon_1.3.4 KernSmooth_2.23-17
[105] plotly_4.9.2.1 ungeviz_0.1.0 rmarkdown_2.3 readxl_1.3.1
[109] grid_4.0.2 data.table_1.13.0 blob_1.2.1 reprex_0.3.0
[113] digest_0.6.27 xtable_1.8-4 httpuv_1.5.4 munsell_0.5.0
[117] viridisLite_0.3.0